Code
import time
import numpy as np
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
from itables import show
from utils.mimic_load import load_csv, segment_signal_by_labelimport time
import numpy as np
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
from itables import show
from utils.mimic_load import load_csv, segment_signal_by_labelRANDOM_SEED = int(time.time())
USER_SELECT = 1
SEGMENT_SIZE = 3750 # Equivalente a 30s de muestras
FS = 1250
FOLDER_PATH = "./dataset/mimic_perform_af_csv"final_df = load_csv(FOLDER_PATH, USER_SELECT)
filtered_df = final_df[final_df['label'] == 0]final_df.head()| Time | PPG | ECG | resp | numb_user | label | |
|---|---|---|---|---|---|---|
| 0 | 0.000 | 0.537634 | 0.425781 | -0.029340 | 1 | 0 |
| 1 | 0.008 | 0.534702 | 0.404297 | -0.036675 | 1 | 0 |
| 2 | 0.016 | 0.531769 | 0.400391 | -0.044010 | 1 | 0 |
| 3 | 0.024 | 0.528837 | 0.400391 | -0.053790 | 1 | 0 |
| 4 | 0.032 | 0.524927 | 0.419922 | -0.061125 | 1 | 0 |
final_df.info()<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2100014 entries, 0 to 2100013
Data columns (total 6 columns):
# Column Dtype
--- ------ -----
0 Time float64
1 PPG float64
2 ECG float64
3 resp float64
4 numb_user int64
5 label int64
dtypes: float64(4), int64(2)
memory usage: 96.1 MB
final_df.describe()| Time | PPG | ECG | resp | numb_user | label | |
|---|---|---|---|---|---|---|
| count | 2.100014e+06 | 2.100014e+06 | 2.100014e+06 | 2.100014e+06 | 2.100014e+06 | 2.100014e+06 |
| mean | 6.000000e+02 | 1.193070e+00 | 3.920216e-01 | 2.226942e-01 | 1.057143e+01 | 9.285714e-01 |
| std | 3.464126e+02 | 7.767317e-01 | 2.349270e-01 | 4.319208e-01 | 5.827452e+00 | 2.575394e-01 |
| min | 0.000000e+00 | 0.000000e+00 | -5.019608e-01 | -1.846506e+00 | 1.000000e+00 | 0.000000e+00 |
| 25% | 3.000000e+02 | 4.633431e-01 | 2.346041e-01 | -4.156479e-02 | 6.000000e+00 | 1.000000e+00 |
| 50% | 6.000000e+02 | 8.064516e-01 | 3.847656e-01 | 2.200000e-01 | 1.150000e+01 | 1.000000e+00 |
| 75% | 9.000000e+02 | 1.893451e+00 | 5.195312e-01 | 4.643077e-01 | 1.600000e+01 | 1.000000e+00 |
| max | 1.200000e+03 | 4.001955e+00 | 1.503922e+00 | 2.844215e+00 | 1.900000e+01 | 1.000000e+00 |
final_df.shape(2100014, 6)
columns_to_plot = ["PPG", "resp"]
plot_df = filtered_df[columns_to_plot]
fig = px.histogram(plot_df.melt(var_name="Variable", value_name="Valor"),
x="Valor",
color="Variable",
facet_row="Variable", # Cambiar facet_col por facet_row
title="Distribuciones de las Variables Seleccionadas",
nbins=50)
fig.show()fig = go.Figure()
fig.add_trace(go.Box(y=filtered_df['PPG'], name='PPG'))
fig.add_trace(go.Box(y=filtered_df['resp'], name='Resp'))
fig.update_layout(
title="Boxplots para Identificación de Valores Atípicos",
xaxis_title="Variables",
yaxis_title="Valores",
template="plotly_white"
)
fig.show()fig = px.scatter(filtered_df, x='PPG', y='resp', title='Scatter Plot: PPG vs resp')
fig.show()